Read in counts data for all samples

#setwd("C:/Users/elkin/Desktop/Git_Projects/rat_tce_rnaseq/results")
load(file="tce_rat_counts.rda")# all samples from merged count files
dim(all_samples_1_48)
>>> [1] 32011    54

Assign metadata to count files

#setwd("C:/Users/elkin/Desktop/Git_Projects/rat_tce_rnaseq/results")
meta.rat<-read.csv(file="metadata.csv", stringsAsFactors=F)#load metadata
counts.rat<-all_samples_1_48[,7:54]#make gene symbols and counts only
rownames(counts.rat)<-all_samples_1_48[,1]
colnames(counts.rat)<-meta.rat[,1]
dim(counts.rat)
>>> [1] 32011    48
datatable(counts.rat)

Formatting metadata for DESeq2

coldata_48<- read.csv("Coldata_48.txt",header=T,sep="\t",stringsAsFactors=F) #For 48 samples
dim(coldata_48)
>>> [1] 48  4
coldata_48<-data.frame(coldata_48) 
coldata_48$Sex<-as.factor(coldata_48$Sex)
coldata_48$Rep<-as.factor(coldata_48$Rep)
coldata_48$Trt<-as.factor(coldata_48$Trt)
coldata_48$Trt<-relevel(factor(coldata_48$Trt), ref="C")
dim(coldata_48)
>>> [1] 48  4
head(coldata_48)
>>>   Sex Rep Trt dam
>>> 1   M   1   C   a
>>> 2   M   2   C   b
>>> 3   M   3   C   c
>>> 4   M   4   C   d
>>> 5   M   5   C   e
>>> 6   M   6   C   f

Creating DESeq2 object with counts data (without RUVr correction)

ds.all<-DESeqDataSetFromMatrix(counts.rat [,1:48],colData=coldata_48,design=~Rep+Trt)
rownames(ds.all)<-all_samples_1_48[,1]

Adding RUV to DEseq object

ds <- ds.all
ds$Trt<- as.factor(ds$Trt)
design(ds) <- ~ Rep + Trt
set <- newSeqExpressionSet(counts(ds, normalized=F), phenoData=AnnotatedDataFrame(data=as.data.frame(colData(ds))))
y <- DGEList(counts=counts(ds, normalized=F), group=ds$Trt)
design <- model.matrix(~ds$Rep+ds$Trt)
y <- calcNormFactors(y, method="upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
fit <- glmFit(y, design)
resid <- residuals(fit, type="deviance")
ruv <- RUVr(x=set, cIdx=rownames(set), k=1, residuals=resid)
pData(ruv)
>>>                   Sex Rep     Trt dam          W_1
>>> N1_Control_Male     M   1       C   a -0.173385717
>>> N2_Control_Male     M   2       C   b  0.095977919
>>> N3_Control_Male     M   3       C   c  0.115402694
>>> N4_Control_Male     M   4       C   d -0.048217654
>>> N5_Control_Male     M   5       C   e  0.139165154
>>> N6_Control_Male     M   6       C   f  0.169629716
>>> N1_Control_Female   F   1       C   a -0.280283445
>>> N2_Control_Female   F   2       C   b -0.061672673
>>> N3_Control_Female   F   3       C   c -0.014556129
>>> N4_Control_Female   F   4       C   d -0.092380467
>>> N5_Control_Female   F   5       C   e  0.009302410
>>> N6_Control_Female   F   6       C   f  0.189290518
>>> N1_TCE_Male         M   1     TCE   g  0.031492322
>>> N2_TCE_Male         M   2     TCE   h -0.022613196
>>> N3_TCE_Male         M   3     TCE   i -0.046742082
>>> N4_TCE_Male         M   4     TCE   j  0.141470959
>>> N5_TCE_Male         M   5     TCE   k -0.187389881
>>> N6_TCE_Male         M   6     TCE   l -0.255424655
>>> N1_TCE_Female       F   1     TCE   g  0.142581537
>>> N2_TCE_Female       F   2     TCE   h  0.068782585
>>> N3_TCE_Female       F   3     TCE   i  0.131544243
>>> N4_TCE_Female       F   4     TCE   j  0.055772940
>>> N5_TCE_Female       F   5     TCE   k -0.054589124
>>> N6_TCE_Female       F   6     TCE   l -0.022547890
>>> N1_NAC_Male         M   1     NAC   m -0.196949899
>>> N2_NAC_Male         M   2     NAC   n  0.282700072
>>> N3_NAC_Male         M   3     NAC   o -0.162590824
>>> N4_NAC_Male         M   4     NAC   p  0.148027133
>>> N5_NAC_Male         M   5     NAC   q  0.072678314
>>> N6_NAC_Male         M   6     NAC   r  0.110705677
>>> N1_NAC_Female       F   1     NAC   m  0.100984751
>>> N2_NAC_Female       F   2     NAC   n -0.256488953
>>> N3_NAC_Female       F   3     NAC   o  0.218244316
>>> N4_NAC_Female       F   4     NAC   p -0.284915981
>>> N5_NAC_Female       F   5     NAC   q -0.009360384
>>> N6_NAC_Female       F   6     NAC   r -0.061986807
>>> N1_TCE_NAC_Male     M   1 TCE_NAC   s  0.223125362
>>> N2_TCE_NAC_Male     M   2 TCE_NAC   t -0.004343612
>>> N3_TCE_NAC_Male     M   3 TCE_NAC   u  0.053823502
>>> N4_TCE_NAC_Male     M   4 TCE_NAC   v  0.064296168
>>> N5_TCE_NAC_Male     M   5 TCE_NAC   w  0.042999925
>>> N6_TCE_NAC_Male     M   6 TCE_NAC   x -0.037427622
>>> N1_TCE_NAC_Female   F   1 TCE_NAC   s  0.196743794
>>> N2_TCE_NAC_Female   F   2 TCE_NAC   t -0.145893575
>>> N3_TCE_NAC_Female   F   3 TCE_NAC   u -0.268744168
>>> N4_TCE_NAC_Female   F   4 TCE_NAC   v -0.030933400
>>> N5_TCE_NAC_Female   F   5 TCE_NAC   w  0.015145223
>>> N6_TCE_NAC_Female   F   6 TCE_NAC   x -0.100449098
#make deseq new deseq object that has the RUV factors
ds.all.ruv<- DESeqDataSetFromMatrix(countData = counts(ruv), colData = pData(ruv), 
                                    design = ~ W_1 + Rep + Trt)
head(ds.all.ruv)
>>> class: DESeqDataSet 
>>> dim: 6 48 
>>> metadata(1): version
>>> assays(1): counts
>>> rownames(6): LOC108349478 LOC103690911 ... LOC102556098 LOC103690072
>>> rowData names(0):
>>> colnames(48): N1_Control_Male N2_Control_Male ... N5_TCE_NAC_Female
>>>   N6_TCE_NAC_Female
>>> colData names(5): Sex Rep Trt dam W_1

PCA Plots with RUVr correction

rld.ds.all.ruv<- rlogTransformation(ds.all.ruv) #log transform
DESeq2::plotPCA(rld.ds.all.ruv, intgroup="Trt")

DESeq2::plotPCA(rld.ds.all.ruv, intgroup="Rep")

DESeq2::plotPCA(rld.ds.all.ruv, intgroup="Sex")

# Manually filtering out genes with count<6 
keep <- rowMeans(counts(ds.all.ruv)) >= 6
ds.all.ruv <- ds.all.ruv[keep,]
dim (ds.all.ruv)
>>> [1] 16661    48

Run DEseq2 Analysis with RUV (males)

ds.ruv.male<- ds.all.ruv[ , ds.all.ruv$Sex == "M" ] #subsetting DESeq2 object by sex
head(ds.ruv.male) #confirming male only samples
>>> class: DESeqDataSet 
>>> dim: 6 24 
>>> metadata(1): version
>>> assays(1): counts
>>> rownames(6): LOC108349479 LOC102556098 ... Raet1e LOC102547056
>>> rowData names(0):
>>> colnames(24): N1_Control_Male N2_Control_Male ... N5_TCE_NAC_Male
>>>   N6_TCE_NAC_Male
>>> colData names(5): Sex Rep Trt dam W_1
ds.ruv.male<- DESeq(ds.ruv.male) # with RUV Does the Empirical Bayes test
ds.ruv.male<- nbinomWaldTest(ds.ruv.male, maxit=10000) # Corrects Empirical Bayes test warning
ds.male.ruv.tce<- results(ds.ruv.male, contrast =c("Trt","TCE","C"), independentFiltering = FALSE)# Extract results table
ds.male.ruv.nac<- results(ds.ruv.male, contrast =c("Trt","NAC","C"), independentFiltering = FALSE) # Extract results table
ds.male.ruv.tce.nac<- results(ds.ruv.male, contrast =c("Trt","TCE_NAC","C"), independentFiltering = FALSE)# Extract results table

Resultss (males)

#Control v. TCE (male)
mcols(ds.male.ruv.tce)
>>> DataFrame with 6 rows and 2 columns
>>>                        type                               description
>>>                 <character>                               <character>
>>> baseMean       intermediate mean of normalized counts for all samples
>>> log2FoldChange      results      log2 fold change (MLE): Trt TCE vs C
>>> lfcSE               results              standard error: Trt TCE vs C
>>> stat                results              Wald statistic: Trt TCE vs C
>>> pvalue              results           Wald test p-value: Trt TCE vs C
>>> padj                results                      BH adjusted p-values
summary(ds.male.ruv.tce,alpha=0.05)
>>> 
>>> out of 16661 with nonzero total read count
>>> adjusted p-value < 0.05
>>> LFC > 0 (up)       : 24, 0.14%
>>> LFC < 0 (down)     : 6, 0.036%
>>> outliers [1]       : 0, 0%
>>> low counts [2]     : 0, 0%
>>> (mean count < 0)
>>> [1] see 'cooksCutoff' argument of ?results
>>> [2] see 'independentFiltering' argument of ?results
palette <- brewer.pal(4, "Reds")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.male.ruv.tce, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_TCE_Male.pdf", xlim=c(-9,9),
                   ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.male.ruv.tce, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
with(subset(ds.male.ruv.tce, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.male.ruv.tce, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[5]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]), 
       legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1  or < -1" , "FDR<0.05 and LogFC between -1 & 1" ,  "FDR<0.05 and LogFC >1  or < -1"))
abline(h=4.1, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)

#Control v. NAC (male)
mcols(ds.male.ruv.nac)
>>> DataFrame with 6 rows and 2 columns
>>>                        type                               description
>>>                 <character>                               <character>
>>> baseMean       intermediate mean of normalized counts for all samples
>>> log2FoldChange      results      log2 fold change (MLE): Trt NAC vs C
>>> lfcSE               results              standard error: Trt NAC vs C
>>> stat                results              Wald statistic: Trt NAC vs C
>>> pvalue              results           Wald test p-value: Trt NAC vs C
>>> padj                results                      BH adjusted p-values
summary(ds.male.ruv.nac,alpha=0.05)
>>> 
>>> out of 16661 with nonzero total read count
>>> adjusted p-value < 0.05
>>> LFC > 0 (up)       : 0, 0%
>>> LFC < 0 (down)     : 0, 0%
>>> outliers [1]       : 0, 0%
>>> low counts [2]     : 0, 0%
>>> (mean count < 0)
>>> [1] see 'cooksCutoff' argument of ?results
>>> [2] see 'independentFiltering' argument of ?results
palette <- brewer.pal(4, "Reds")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.male.ruv.nac, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_NAC_Male.pdf", xlim=c(-9,9),
                           ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.male.ruv.nac, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.male.ruv.nac, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[2]))
with(subset(ds.male.ruv.nac, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]), 
       legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1  or < -1" , "FDR<0.05 and LogFC between -1 & 1" ,  "FDR<0.05 and LogFC >1  or < -1"))
abline(h=5.2, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)

#Control v. TCE+NAC (male)
palette <- brewer.pal(4, "Reds")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.male.ruv.tce.nac, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_TCE_NAC_Male.pdf", xlim=c(-9,9),
                           ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.male.ruv.tce.nac, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.male.ruv.tce.nac, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[2]))
with(subset(ds.male.ruv.tce.nac, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]), 
       legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1  or < -1" , "FDR<0.05 and LogFC between -1 & 1" ,  "FDR<0.05 and LogFC >1  or < -1"))
abline(h=3.6, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)

Run DEseq2 Analysis with RUV (females)

ds.ruv.female<- ds.all.ruv[ , ds.all.ruv$Sex == "F" ] #subsetting DESeq2 object by sex
head(ds.ruv.female) #confirming female only samples
>>> class: DESeqDataSet 
>>> dim: 6 24 
>>> metadata(1): version
>>> assays(1): counts
>>> rownames(6): LOC108349479 LOC102556098 ... Raet1e LOC102547056
>>> rowData names(0):
>>> colnames(24): N1_Control_Female N2_Control_Female ... N5_TCE_NAC_Female
>>>   N6_TCE_NAC_Female
>>> colData names(5): Sex Rep Trt dam W_1
ds.ruv.female<- DESeq(ds.ruv.female) # with RUV Does the Empirical Bayes test
ds.ruv.female<- nbinomWaldTest(ds.ruv.female, maxit=10000) # Corrects Empirical Bayes test warning
ds.female.ruv.tce<- results(ds.ruv.female, contrast =c("Trt","TCE","C"), independentFiltering = FALSE)# Extract results table
ds.female.ruv.nac<- results(ds.ruv.female, contrast =c("Trt","NAC","C"), independentFiltering = FALSE) # Extract results table
ds.female.ruv.tce.nac<- results(ds.ruv.female, contrast =c("Trt","TCE_NAC","C"), independentFiltering = FALSE)# Extract results table

Results (females)

#Control v. TCE (female)
mcols(ds.female.ruv.tce)
>>> DataFrame with 6 rows and 2 columns
>>>                        type                               description
>>>                 <character>                               <character>
>>> baseMean       intermediate mean of normalized counts for all samples
>>> log2FoldChange      results      log2 fold change (MLE): Trt TCE vs C
>>> lfcSE               results              standard error: Trt TCE vs C
>>> stat                results              Wald statistic: Trt TCE vs C
>>> pvalue              results           Wald test p-value: Trt TCE vs C
>>> padj                results                      BH adjusted p-values
summary(ds.female.ruv.tce,alpha=0.05)
>>> 
>>> out of 16658 with nonzero total read count
>>> adjusted p-value < 0.05
>>> LFC > 0 (up)       : 388, 2.3%
>>> LFC < 0 (down)     : 91, 0.55%
>>> outliers [1]       : 0, 0%
>>> low counts [2]     : 0, 0%
>>> (mean count < 0)
>>> [1] see 'cooksCutoff' argument of ?results
>>> [2] see 'independentFiltering' argument of ?results
palette <- brewer.pal(4, "Greens")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.female.ruv.tce, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_TCE_female.pdf", xlim=c(-9,9),
                           ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.female.ruv.tce, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.female.ruv.tce, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[2]))
with(subset(ds.female.ruv.tce, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]), 
       legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1  or < -1" , "FDR<0.05 and LogFC between -1 & 1" ,  "FDR<0.05 and LogFC >1  or < -1"))
abline(h=2.8, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)

#Control v. NAC (female)
mcols(ds.female.ruv.nac)
>>> DataFrame with 6 rows and 2 columns
>>>                        type                               description
>>>                 <character>                               <character>
>>> baseMean       intermediate mean of normalized counts for all samples
>>> log2FoldChange      results      log2 fold change (MLE): Trt NAC vs C
>>> lfcSE               results              standard error: Trt NAC vs C
>>> stat                results              Wald statistic: Trt NAC vs C
>>> pvalue              results           Wald test p-value: Trt NAC vs C
>>> padj                results                      BH adjusted p-values
summary(ds.female.ruv.nac,alpha=0.05)
>>> 
>>> out of 16658 with nonzero total read count
>>> adjusted p-value < 0.05
>>> LFC > 0 (up)       : 0, 0%
>>> LFC < 0 (down)     : 0, 0%
>>> outliers [1]       : 0, 0%
>>> low counts [2]     : 0, 0%
>>> (mean count < 0)
>>> [1] see 'cooksCutoff' argument of ?results
>>> [2] see 'independentFiltering' argument of ?results
palette <- brewer.pal(4, "Greens")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.female.ruv.nac, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_NAC_female.pdf", xlim=c(-9,9),
                           ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.female.ruv.nac, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.female.ruv.nac, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[2]))
with(subset(ds.female.ruv.nac, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]), 
       legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1  or < -1" , "FDR<0.05 and LogFC between -1 & 1" ,  "FDR<0.05 and LogFC >1  or < -1"))
abline(h=5.3, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)

#Control v. TCE+NAC (male)
mcols(ds.female.ruv.tce.nac)
summary(ds.female.ruv.tce.nac,alpha=0.05)
palette <- brewer.pal(4, "Greens")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.female.ruv.tce.nac, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_TCE_NAC_female.pdf", xlim=c(-9,9),
                               ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.female.ruv.tce.nac, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.female.ruv.tce.nac, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[2]))
with(subset(ds.female.ruv.tce.nac, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]), 
       legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1  or < -1" , "FDR<0.05 and LogFC between -1 & 1" ,  "FDR<0.05 and LogFC >1  or < -1"))
abline(h=2.75, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)

Data Exploration plots

#MAplots (with RUVr)
DESeq2::plotMA(ds.male.ruv.tce)

#histogram of pvals
hist(ds.male.ruv.tce$pvalue)

#dispersion estimate plots
plotDispEsts(ds.ruv.male, ylim = c(1e-6, 10e1))